***************************************
Macro to read in 1964-1967 CPS -
Data from CPS Utilities. 
Program 1 of 4

4/29/10:  EDITED TO RUN ON JIM'S COMPUTER, ALSO MANY OTHER CHANGES.  
		  MAIN CHANGE WAS TO CORRECT FOR FACT THAT RELATED SUBFAMS ARE
		  IN SAME FAMILY AS PRIMARY FAMILIES FOR CALC OF # OVER 14 AND OVER 18.
***************************************;

filename mar64 "C:\Users\worri\Dropbox\Poverty\SAS Programs CPS 64-67\mar64.raw";
filename mar65 "C:\Users\worri\Dropbox\Poverty\SAS Programs CPS 64-67\mar65.raw";
filename mar66 "C:\Users\worri\Dropbox\Poverty\SAS Programs CPS 64-67\mar66.raw";
filename mar67 "C:\Users\worri\Dropbox\Poverty\SAS Programs CPS 64-67\mar67.raw";


*****************************************
MACRO TO READ IN DATA FOR CPS 1964-1967
*****************************************;
  %macro read(yr,dir);

  data cps&yr;
  INFILE &dir DLM='"' ;

length fam18 3 ;
length faminc 8 ;
length famwgt 8 ;
length fnumper 3 ;
length hdager 3 ;
length hdsex 3 ;
length numper 3 ;
length psu 3 ;
length hdage 3 ;
length hnrlct 3 ;
length _faminc 8 ;
length povcut 4 ;
length fmifrm 3 ;
length fmise 3 ;
length fmiwag 3 ;
length region 3 ;
length povoeo 3 ;
length famdesc 3 ;
length fmincs 3 ;
length child18 3 ;
length cldr18 3 ;
length hhnum 3 ;
length sched 4 ;
length recnum 8 ;
length famtyp 3 ;
length famnum 3 ;
length farm 3 ;
length income 8 ;
length lineno 3 ;
length random 4 ;
length state 3 ;
length wgt 8 ;
length wkslyr 3 ;
length year 3 ;
length age 3 ;
length _income 8 ;
length _state 3 ;
length hhid $ 7 ;
length famrel 3 ;
length relhd 3 ;
length marstat 3 ;
length incuer 8 ;
length dunern 3 ;
length incfrm 8 ;
length incse 8 ;
length incwag 8 ;
length rnowrk 3 ;
length _educ 3 ;
length race 3 ;
length popstat 3 ;
length _popstat 3 ;
length _hhid $ 7 ;
length grdcom 3 ;
length grdhi 3 ;
length sex 3 ;
length keyftz 3 ;
input
	fam18 /
	faminc /
	famwgt /
	fnumper /
	hdager /
	hdsex /
	numper /
	psu /
	hdage /
	hnrlct /
	_faminc /
	povcut /
	fmifrm /
	fmise /
	fmiwag /
	region /
	povoeo /
	famdesc /
	fmincs /
	child18 /
	cldr18 /
	hhnum /
	sched /
	recnum /
	famtyp /
	famnum /
	farm /
	income /
	lineno /
	random /
	state /
	wgt /
	wkslyr /
	year /
	age /
	_income /
	_state /
	hhid /
	famrel /
	relhd /
	marstat /
	incuer /
	dunern /
	incfrm /
	incse /
	incwag /
	rnowrk /
	_educ /
	race /
	popstat /
	_popstat /
	_hhid /
	grdcom /
	grdhi /
	sex /
	keyftz
;

* Program code to specify labels. ;

 label recnum = "Unique record ID number" ;
 label fam18 = "Number of family members under 18" ;
 label faminc = "Total family income" ;
 label famwgt = "Family weight" ;
 label fnumper = "Number of persons in family" ;
 label hdager = "Age recode of head" ;
 label hdsex = "Gender of head" ;
 label numper = "Number of persons in household" ;
 label psu = "PSU No." ;
 label hdage = "Age of head/unrel indiv" ;
 label hnrlct = "# Nonrelatives of head of HH in HH" ;
 label _faminc = "Total family income";
 label povcut = "Poverty level cutoff dollar amount" ;
 label fmifrm = "Family income-farm" ;
 label fmise = "Family income-self-employment" ;
 label fmiwag = "Family income-wages & salaries" ;
 label region = "Region of the country " ;
 label povoeo = "OEO poverty code" ;
 label famtyp = "Kind of family, person level" ;
 label famnum = "Family ID within household" ;
 label farm = "Farm recode" ;
 label income = "Person's total income" ;
 label lineno = "Line number" ;
 label random = "Random cluster code (sched #)" ;
 label state="State";
 label wgt = "March supplemental weight";
 label wkslyr = "Weeks worked last year";
 label year = "Year of CPS survey";
 label age = "Age of person" ;
 label _income = "Person's total income" ;
 label _state="State recoded for all years";
 label hhid = "Household ID #2  long" ;
 label famrel = "Family relationship" ;
 label relhd = "Relationship to household head" ;
 label marstat = "Marital status" ;
 label incuer = "Income from unearned sources" ;
 label dunern = "Detailed sources of unearned income" ;
 label incfrm = "Income from farm/nonincorporated SE" ;
 label incse = "Income from nonfarm self-employ" ;
 label incwag = "Income from wage & salary" ;
 label rnowrk = "Reason did not work last year" ;
 label _educ = "Years of education" ;
 label race = "Race" ;
 label famdesc = "Family description" ;
 label popstat = "Population status" ;
 label _popstat = "Population status" ;
 label fmincs = "Source of family income" ;
 label _hhid = "Household ID #2 long, recode" ;
 label grdcom = "Completed highest grade attended" ;
 label grdhi = "Highest grade attended" ;
 label child18 = "Never married children in fam <18" ;
 label cldr18 = "# related kids in fam <18 oth than own" ;
 label sex="Sex";
 label hhnum = "Household ID #2 -item9" ;
 label sched = "Schedule number";
 label keyftz = "Keyfitz cluster (PSU #)" ;

 pers_id=_N_;


*For those with Family Types (famtyp) missing - famdesc never equals 2 but 
I include it in the statement for completeness;
If famtyp=. then do;
if famdesc=5 then famtyp=5;
if famdesc=4 then famtyp=4;
if famdesc=3 then famtyp=3;
if famdesc=0 then famtyp=1;
if famdesc=2 then famtyp=2;
if famdesc=1 and famrel=4 then famtyp=1;
if famdesc=1 and famrel NE 4 then famtyp=2;
end;

IF incfrm=. THEN incfrm=0;
IF incse=. THEN incse=0;
IF incwag=. THEN incwag=0;
IF incuer=. THEN incuer=0;

IF fmifrm=. THEN fmifrm=0;
IF fmise=. THEN fmise=0;
IF fmiwag=. THEN fmiwag=0;

ptotearn=SUM(OF incfrm incse incwag);
ftotearn_final=SUM(OF fmifrm fmise fmiwag);
	  



surv_yr=&yr;
ref_year=&yr-1;



* FAMILY HEADS INCLUDE HEADS OF HOUSEHOLD (HDREL=1), HEADS OF SUBFAMILIES (FAMREL=0), OR 
  INDIVIDUALS (FAMTYP=4 OR 5);
IF relhd=1 OR famrel=0 OR famtyp IN(4,5) THEN head=1;
   ELSE head=0;

RUN;

PROC SORT DATA=cps&yr;
   BY hhid famtyp famwgt faminc famrel;
data cps&yr;
set cps&yr;
IF (famtyp=1 and famrel=0) OR (famtyp=2 and famrel=0)
						   OR (famtyp=3 and famrel=0) 
						   OR famtyp IN(4,5) 
						   /* OR (famtyp=2 and famrel=2) */ THEN famid=_N_;
RETAIN famid;
*Famtyp=1 and famrel=0 head of primary family;
*Famtyp=2 and famrel=0 head of related subfamily;
*Famtyp=3 and famrel=0 head of secondary subfamily;
*Famtyp IN(4,5) then primary and secondary individuals;
*Famtyp=2 and famrel=2 is a single individual that is related to the primary family.  
 A family according to the codebook in 1964-1967
 must have 2 people, because of this we single individuals in a related subfamily 
 (related subfamily of size 1) are recognized differently;

if &yr=1967 then do;
*Something crazy in 1967 where there are 60 replicates so we delete them (all of these are in the original dataset - this has nothing
to do with the retain statement above) If you need to see this just use the downloaded data and restrict your dataset to 
(psu=908 and sched=8004) or (PSU=908 and sched=8002);
if sched=8004 and psu=908 then delete;
end;
run;

*Need to retain the number of non-relatives in a household for the related subfamily - (right now it is coded as 8 (missing))
Originally this variable is only avaialble for the primary family;
PROC SORT data=cps&yr;
by hhid famid;
run;
data cps&yr;
set cps&yr;
if (famtyp=1 and famrel=0) or (famtyp=3 and famrel=0) or famtyp IN(4,5) then do;
retain hnrlct;
end;
run;

proc sort data=cps&yr;
by pers_id;
run;
 

%mend read;
%read(1964,mar64);
%read(1965,mar65);
%read(1966,mar66);
%read(1967,mar67);

**********************************
MACRO TO CLEAN DATA CPS 1964-1967
**********************************;
%macro clean6467(yr);


* FIRST DO FOR PRIMARY FAMILIES AND RELATED SUBFAMILIES;
PROC SORT DATA=cps&yr;
   BY hhid;
RUN;
*Create a variable for individuals 18 and above in family & # 14 and older;
PROC MEANS N noprint data=cps&yr;
  BY hhid;
  WHERE age GE 18 AND famtyp IN(1,2);
  OUTPUT OUT=family18up N=fam18up_1;
RUN;
*Create a variable for individuals 14 and above in family;
PROC MEANS N noprint data=cps&yr;
  BY hhid;
  WHERE age GE 14 AND famtyp IN(1,2);
  OUTPUT OUT=family14up N=fam14up_1;
RUN;
*Create a variable for the count of kids in a family between 14 and 17 that are no 
heads of the family;
PROC MEANS N noprint data=cps&yr;
  BY hhid;
  where 14 LE age LE 17 and famrel NE 0 AND famtyp IN(1,2);
  output out=kidcount N=kidcount_1;
  run;

DATA cps&yr;
  MERGE cps&yr(IN=in1) family18up(KEEP=hhid fam18up_1) 
					   family14up(KEEP=hhid fam14up_1) 
					   kidcount(KEEP=hhid kidcount_1) ;
  BY hhid;
  IF in1;
RUN;


*NOW DO FOR OTHER FAMILY TYPES;
PROC SORT DATA=cps&yr;
   BY famid;
RUN;
*Create a variable for individuals 18 and above in family & # 14 and older;
PROC MEANS N noprint data=cps&yr;
  BY famid;
  WHERE age GE 18;
  OUTPUT OUT=family18up N=fam18up_2;
RUN;
*Create a variable for individuals 14 and above in family;
PROC MEANS N noprint data=cps&yr;
  BY famid;
  WHERE age GE 14;
  OUTPUT OUT=family14up N=fam14up_2;
RUN;
*Create a variable for the count of kids in a family between 14 and 17 that are no 
heads of the family;
PROC MEANS N noprint data=cps&yr;
  BY famid;
  where 14 LE age LE 17 and famrel NE 0;
  output out=kidcount N=kidcount_2;
  run;

DATA cpsdata.cps&yr;
  MERGE cps&yr(IN=in1) family18up(KEEP=famid fam18up_2) 
					   family14up(KEEP=famid fam14up_2) 
					   kidcount(KEEP=famid kidcount_2) ;
  BY famid;
  IF in1;

  IF famtyp IN(1,2) THEN DO;
    fam18up=fam18up_1;
	fam14up=fam14up_1;
	kidcount=kidcount_1;
  END;
  IF famtyp IN(3,4,5) THEN DO;
    fam18up=fam18up_2;
	fam14up=fam14up_2;
	kidcount=kidcount_2;
  END;

  if kidcount=. then kidcount=0;
  *Famtyp IN(4 5) are secondary and primary individuals;
  If famtyp IN(4,5) then fpersons_final=1;
  If famtyp IN(4,5) then familydifference=0;

 If famtyp NOTIN(4,5) then do;
   *Famtyp IN(2 3) are related subfamily and secondary family respectively;
   * Famdesc=0 and famtyp=1 is a primary family without a related subfamily;
   * Famdesc=1 and famtyp=1 is a primary family with a related subfamily;
     IF fnumper LE 6 THEN fpersons_final=fnumper;
       ELSE IF fnumper GE 7 AND fam18 LE 6  
				THEN fpersons_final=fam14up+MAX(0,fam18-kidcount);
  	*At this point the only individuals without familysize variables are those that are at the cap 
	for both fnumper (family size) and fam18(family members under 18 (related)) - 
    For these individuals, 
	882, they are all either in a primary family or a related subfamily the following routine takes 
	advantage of this.  It is necessary that no secondary families hit this constraint of family size 
	and family members under 18 which they do not;	

	   ELSE IF fnumper GE 7 AND fam18 GE 7 and famtyp IN(1,2) 
			    THEN fpersons_final=numper-hnrlct;
	   * NOTE, NONE SATISFY THE NEXT CONDITION;
 	   ELSE IF fnumper GE 7 AND fam18 GE 7 AND famtyp=3 
				THEN fpersons_final=fam14up+MAX(0,fam18-kidcount);
 
  *     WE ASSUME THAT THE KIDS UNDER 14 HAVE THE SAME 
   		INDIVIDUAL WGT AS THE HEAD, SO ADJUST UP THE WEIGHT 
		OF THE HEAD BY # OF KIDS UNDER 14.;
	 *Here for primary families and related subfamilies - we give 
	  all individuals under the age of 14 to the related subfamily
	  that is why familydifference is set to zero for primary families 
	  with a related subfamily (famtyp=1 and famdesc=1);
    IF famtyp IN(2,3) OR (famtyp=1 AND famdesc=0) then familydifference=fpersons_final-fam14up;
       ElSE IF famtyp=1 and famdesc=1 then familydifference=0;
END;


* IF NO RELATED SUBFAMILY PRESENT, THEN GIVE WGT OF PRIMARY HD.  IF RELATED SUBFAMILY PRESENT, GIVE
  WGT OF RELATED SUBFAMILY HEAD ;

  IF relhd=1 AND famtyp=1 AND familydifference GT 0 THEN pweight=(1+familydifference)*wgt/100;
    ELSE IF famrel=0 AND famtyp=2 AND familydifference GT 0 THEN pweight=(1+familydifference)*wgt/100;
    ELSE IF famrel=0 AND famtyp=3 AND familydifference GT 0 THEN pweight=(1+familydifference)*wgt/100;
	ELSE pweight=wgt/100;


*  CALCULATE ADJUSTED # OF KIDS UNDER 18 FOR POVERTY BY AGE ANALYSIS;

IF fam18 LE 6 THEN fam_18_adj=fam18;
   ELSE IF fam18 GE 7 THEN fam_18_adj=MAX(fam18,fpersons_final-fam18up);

*  CALCULATE A WEIGHT FOR ALL KIDS UNDER 18, TO CALCULATE POV RATE FOR KIDS UNDER 18;
IF famrel IN(0,4) AND familydifference GT 0 AND age LE 17 THEN kid_wgt=(1+familydifference)*wgt/100;
    ELSE IF famrel IN(0,4) AND familydifference GT 0 AND age GE 18 THEN kid_wgt=(familydifference)*wgt/100;
	ELSE IF 14 LE age LE 17 THEN kid_wgt=wgt/100;
	ELSE kid_wgt=0; 

  RENAME hdsex=sex_head;
  RENAME hdager=age_head_code;
  RENAME fam18=tot_kids;
  RENAME income=ptotinc;
  RENAME povcut=official_povcut;
  RENAME rnowrk=rsnnotw;
  RENAME wkslyr=wkswrk;
    

ref_year=&yr-1;

RUN;

* FIRST DO FOR PRIMARY FAMILIES AND RELATED SUBFAMILIES;
PROC SORT DATA=cpsdata.cps&yr;
   BY hhid;
RUN;
*Create a variable for individuals 18 and above in family & # 14 and older;
PROC MEANS SUM noprint data=cpsdata.cps&yr;
  BY hhid;
  WHERE famtyp IN(1,2);
  VAR kid_wgt;
  OUTPUT OUT=kid_wgt1 SUM=kid_wgt_tot1;
RUN;

DATA cpsdata.cps&yr;
  MERGE cpsdata.cps&yr(IN=in1) kid_wgt1(KEEP=hhid kid_wgt_tot1);
  BY hhid;
  IF in1;
RUN;

* NEXT DO FOR OTHER FAMILIES;
PROC SORT DATA=cpsdata.cps&yr;
   BY famid;
RUN;
*Create a variable for individuals 18 and above in family & # 14 and older;
PROC MEANS SUM noprint data=cpsdata.cps&yr;
  BY famid;
  VAR kid_wgt;
  OUTPUT OUT=kid_wgt2 SUM=kid_wgt_tot2;
RUN;

DATA cpsdata.cps&yr;
  MERGE cpsdata.cps&yr(IN=in1) kid_wgt2(KEEP=famid kid_wgt_tot2);
  BY famid;
  IF in1;

  IF famtyp IN(1,2) THEN kid_wgt_tot=kid_wgt_tot1;
	ELSE IF famtyp IN(3,4,5) THEN kid_wgt_tot=kid_wgt_tot1;

RUN;

%mend clean6467;
%clean6467(1964);
%clean6467(1965);
%clean6467(1966);
%clean6467(1967);
